// See the file "COPYING" in the main distribution directory for copyright.

#pragma once

#include <cstdint>
#include <memory>
#include <optional>
#include <vector>

namespace zeek {
class BrokerData;
class BrokerDataView;
} // namespace zeek

namespace zeek::probabilistic::detail {

/**
 * A probabilistic cardinality counter using the HyperLogLog algorithm.
 */
class CardinalityCounter {
public:
    /**
     * Constructor.
     *
     * The number of buckets of the data structure is determined using
     * the error margin and the given confidence.
     *
     * For example, assume an error_margin of 2% and a confidence
     * of 95%. If the Size function returns an estimate of 100, this
     * means that we are 95% sure that the cardinality is between 98
     * and 102.
     *
     * @param error_margin error margin
     *
     * @param confidence confidence of the error. Default: 0.95
     */
    explicit CardinalityCounter(double error_margin, double confidence = 0.95);

    /**
     * Copy-Constructor
     */
    CardinalityCounter(CardinalityCounter& other);

    /**
     * Move-Constructor
     */
    CardinalityCounter(CardinalityCounter&& o) noexcept;

    /**
     * Constructor for a known number of buckets.
     *
     * The error margin is 1.04/sqrt(size) with approximately 68%
     * probability.
     *
     * @param size number of buckets to create
     */
    explicit CardinalityCounter(uint64_t size);

    /**
     * Destructor.
     */
    ~CardinalityCounter() = default;

    /**
     * Add a new element to the counter.
     *
     * The hash function generating the hashes needs to be uniformly
     * distributed over 64 bits.
     *
     * @param hash 64-bit hash value of the element to be added
     */
    void AddElement(uint64_t hash);

    /**
     * Get the current estimated number of elements in the data
     * structure
     *
     * @return Estimated number of elements. Returns -1.0 if there are
     * zero buckets.
     **/
    double Size() const;

    /**
     * Merges the argument cardinality counter with this one. The error
     * margins of both counters have to be the same, otherwise the merge
     * operation will not be carried out.
     *
     * @param c Cardinality counter to merge into the current counter.
     *
     * @return True if successful
     */
    bool Merge(CardinalityCounter* c);

    std::optional<BrokerData> Serialize() const;
    static std::unique_ptr<CardinalityCounter> Unserialize(BrokerDataView data);

protected:
    /**
     * Return the number of buckets.
     *
     * @return Number of buckets
     */
    uint64_t GetM() const;

    /**
     * Returns the buckets array that holds all of the rough cardinality
     * estimates.
     *
     * Use GetM() to determine the size.
     *
     * @return Array containing cardinality estimates
     */
    const std::vector<uint8_t>& GetBuckets() const;

private:
    /**
     * Constructor used when unserializing, i.e., all parameters are
     * known.
     */
    explicit CardinalityCounter(uint64_t size, uint64_t V, double alpha_m);

    /**
     * Helper function with code used jointly by multiple constructors.
     *
     * @param arg_size: number of buckets that need to be kept
     */
    void Init(uint64_t arg_size);

    /**
     * This function calculates the smallest value of b that will
     * satisfy these the constraints of a specified error margin and
     * confidence level.
     *
     * The exact expression for b is as follows:
     * Define x = 2*(log(1.04*k/error)/log(2)). Then b is the ceiling of x.
     *
     * After that initial estimate, the value of b is increased until the
     * standard deviation falls within the specified value.
     *
     * @param error error margin
     *
     * @param confidence confidence of the error
     *
     * @return minimal B-value satisfying the error-rate under confidence.
     */
    int OptimalB(double error, double confidence) const;

    /**
     * Determines at which index (counted from the front) the first one-bit
     * appears. The last b bits have to be 0 (the element has to be divisible
     * by m), hence they are ignored. Always adds 1 to the result. This is the
     * rho function from the original algorithm.
     *
     * @param hash_modified hash value
     *
     * @returns index of first one-bit
     */
    uint8_t Rank(uint64_t hash_modified) const;

    /**
     * flsll from FreeBSD; especially Linux does not have this.
     */
    static int flsll(uint64_t mask);

    /**
     * This is the number of buckets that will be stored. The standard
     * error is 1.04/sqrt(m), so the actual cardinality will be the
     * estimate +/- 1.04/sqrt(m) with approximately 68% probability.
     */
    uint64_t m = 0;

    /**
     * These are the actual buckets that are storing an estimate of the
     * cardinality. All these need to do is count when the first 1 bit
     * appears in the bitstring and that location is at most 65, so not
     * that many bits are needed to store it.
     */
    std::vector<uint8_t> buckets;

    /**
     * There are some state constants that need to be kept track of to
     * make the final estimate easier. V is the number of values in
     * buckets that are 0 and this is used in the small error correction.
     * alpha_m is a multiplicative constant used in the algorithm.
     */
    uint64_t V = 0;
    double alpha_m = 0.0;
    int p = 0; // the log2 of m
};

} // namespace zeek::probabilistic::detail
